DEG analysis
fun_DEG_analysis <- function(clinic,exp){
exp <- exp[,clinic$PATIENT_ID]
if(identical(clinic$PATIENT_ID,colnames(exp))){
design <- model.matrix(~0+factor(clinic$MS)) %>% # group matrix
set_colnames(c(levels(factor(clinic$MS)))) %>%
set_rownames(colnames(exp))
y = voom(exp, design, plot = T) # important!
contrast.matrix<-makeContrasts(paste0(c('MS1','MS2'),collapse = "-"),levels = design)
contrast.matrix
fit <- lmFit(y,design) %>% #step1
contrasts.fit(., contrast.matrix) %>% # step2 important!
eBayes(.) # default no trend !!!
nrDEG = topTable(fit, coef=1, n=Inf) %>% # step3
na.omit(tempOutput)
head(nrDEG)
logFC_cutoff <- 0.5
logFC_cutoff <- with(nrDEG,mean(abs( logFC)) + 2*sd(abs( logFC)))
DEG=nrDEG %>%
dplyr::mutate(change=as.factor(ifelse(.$P.Value < 0.05 & abs(.$logFC) > logFC_cutoff,
ifelse(.$logFC > logFC_cutoff ,'UP','DOWN'),'NOT'))) %>%
dplyr::mutate(GENE_NAME=rownames(.))
table(DEG$change)
#KEGG
DEG <- tibble::rownames_to_column(DEG,var = 'Gene_symbol')
dff <- bitr(unique(DEG$Gene_symbol), fromType = "SYMBOL",
toType = c( "ENTREZID"),
OrgDb = org.Hs.eg.db)
head(dff)
DEG=merge(DEG,dff,by.x='Gene_symbol',by.y='SYMBOL')
head(DEG)
gene_change= DEG[DEG$change %in% c('UP','DOWN'),'ENTREZID']
kk.change <- enrichKEGG(gene= gene_change,
organism = 'hsa',
pvalueCutoff = 0.05)
kegg_result <- kk.change@result
# GSEA
geneList<-DEG$logFC
names(geneList)=DEG$ENTREZID
geneList=sort(geneList,decreasing = T)
set.seed(123)
KEGG_gsearesult <- gseKEGG(geneList, keyType='kegg',organism = "hsa",nPerm = 1000, minGSSize = 25, maxGSSize = 300,pvalueCutoff = 0.1, pAdjustMethod = "BH",use_internal_data=FALSE,seed = T)
KEGG_gsearesult2 <- setReadable(KEGG_gsearesult,
OrgDb = "org.Hs.eg.db",
keyType = "ENTREZID")
gsea_result <- KEGG_gsearesult2 %>% .@result
# output
return(list(logFC_cutoff,
DEG,
kk.change,
kegg_result,
KEGG_gsearesult,
gsea_result))
}
}
res_DEG <- fun_DEG_analysis(df_clinical_cluster,gene_tpm)

# volcano plot
logFC_cutoff <- res_DEG[[1]]
df_volcano <- res_DEG[[2]]
key_genes <- df_volcano[df_volcano$change != "NOT", ]
# pdf('./gene_differential_analysis/volcano_deg_analysis.pdf',width = 4,height = 5,onefile = FALSE)
ggplot(df_volcano, aes(x = logFC, y = -log10(P.Value), color = change)) +
geom_point(alpha = 0.8, size = 2) +
scale_color_manual(values = c("NOT" = "grey",
"UP" = "#E18727CC",
"DOWN" = "#0072B5CC")) +
labs(title = "Volcano Plot",x = "Log2 Fold Change",y = "-Log10(P-value)") +
geom_vline(xintercept = c(-logFC_cutoff, logFC_cutoff), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(data = key_genes, aes(label = GENE_NAME), size = 3) +
theme_few() +
theme(
legend.title = element_blank(),
legend.position = "top"
)

# dev.off()
# gsea plot
gsea_result <- res_DEG[[5]]
gsea_terms <- gsea_result@result[gsea_result@result$p.adjust < 0.05, ]
terms <- c('hsa04110','hsa04610','hsa05150','hsa00350','hsa00830','hsa00140')
gseaList <- lapply(terms, function(x){
gseaNb(object = gsea_result,
geneSetID = x,
addPval = T,
pvalX = 0.5,
pvalY = 0.75,
pCol = 'black',
pHjust = 0)
})
# pdf('./gene_differential_analysis/signaling_pathways.pdf',width = 12,height = 8)
cowplot::plot_grid(plotlist = gseaList,ncol = 3,align = 'hv')

# dev.off()
# netplot top10
geneList<-res_DEG[[2]]$logFC
names(geneList)=res_DEG[[2]]$ENTREZID
geneList=sort(geneList,decreasing = T)
# pdf('./gene_differential_analysis/netplot_signaling_pathways.pdf',width = 6,height = 6)
cnetplot(res_DEG[[5]],
layout = igraph::layout_with_fr,
node_label='category',
colorEdge=TRUE,
colorCategory = "p.adjust",
showCategory = 10, foldChange = geneList, circular = TRUE)

# dev.off()
# interplay plot
otu_data <- read.xlsx('./00_data_preparation/otu_genus_cpm_table.xlsx',sheet = 2,rowNames = TRUE) %>% mutate(across(where(is.character), as.numeric)) %>%log1p() %>% t()
gene_data <- read.xlsx( './00_data_preparation/gene_table.xlsx',sheet = 1,rowNames = TRUE) %>% mutate_all(as.numeric) %>% t()
otu_feature <- read.csv(file = './microbial_feature/Core_features_coefficients.csv',header = TRUE) %>% dplyr::filter(pval<0.05) %>% .$feature %>% unique()
gene_feature_hsa04110 <- unlist(strsplit(res_DEG[[6]]$core_enrichment[res_DEG[[6]]$ID=='hsa04110'], split = "/"))
gene_feature_hsa04610 <- unlist(strsplit(res_DEG[[6]]$core_enrichment[res_DEG[[6]]$ID=='hsa04610'], split = "/"))
gene_feature_hsa05150 <- unlist(strsplit(res_DEG[[6]]$core_enrichment[res_DEG[[6]]$ID=='hsa05150'], split = "/"))
gene_feature_hsa00350 <- unlist(strsplit(res_DEG[[6]]$core_enrichment[res_DEG[[6]]$ID=='hsa00350'], split = "/"))
gene_feature_hsa00830 <- unlist(strsplit(res_DEG[[6]]$core_enrichment[res_DEG[[6]]$ID=='hsa00830'], split = "/"))
gene_feature_hsa00140 <- unlist(strsplit(res_DEG[[6]]$core_enrichment[res_DEG[[6]]$ID=='hsa00140'], split = "/"))
fun_interplay_plot <- function(otu_data,otu_feature,gene_data,gene_feature){
otu_matrix <- as.matrix(otu_data[,otu_feature])
gene_matrix <- as.matrix(gene_data[,gene_feature])
correlation_matrix <- Hmisc::rcorr(otu_matrix, gene_matrix, type = "spearman")
r_values <- correlation_matrix$r[1:ncol(otu_matrix), (ncol(otu_matrix)+1):(ncol(otu_matrix)+ncol(gene_matrix))]
p_values <- correlation_matrix$P[1:ncol(otu_matrix), (ncol(otu_matrix)+1):(ncol(otu_matrix)+ncol(gene_matrix))]
p_adjusted <- matrix(p.adjust(p_values, method = "fdr"),
nrow = nrow(p_values), ncol = ncol(p_values))
rownames(p_adjusted) <- rownames(r_values)
colnames(p_adjusted) <- colnames(r_values)
edges <- which(abs(r_values)>0.3 & p_values < 0.05, arr.ind = TRUE)
edge_list <- data.frame(
from = rownames(r_values)[edges[,1]],
to = colnames(r_values)[edges[,2]],
cor = r_values[edges],
p = p_adjusted[edges]
)
network_graph <- graph_from_data_frame(edge_list, directed = FALSE)
V(network_graph)$shape <- ifelse(grepl("g__", V(network_graph)$name), "circle", "square")
V(network_graph)$color <- ifelse(grepl("g__", V(network_graph)$name), "#00A1D5FF", "#DF8F44FF")
V(network_graph)$size <- 8
E(network_graph)$width <- E(network_graph)$cor * 5
get_edge_colors <- function(cor_values) {
col_pal <- colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(100)
col_index <- round((cor_values + 1)/2 * (100-1)) + 1
colors <- col_pal[col_index]
return(colors)
}
edge_colors <- get_edge_colors(edge_list$cor)
E(network_graph)$color <- edge_colors
plot(network_graph,
layout = layout_in_circle(network_graph), # 使用Fruchterman-Reingold布局
vertex.label.cex = 0.85,
edge.curved = 0.2,
vertex.frame.color = NA,
vertex.label.color = "black",
main = "OTU-Gene Correlation Network"
)
legend("bottomright",
legend = c("OTU", "Gene"),
fill = c("#00A1D5FF", "#DF8F44FF"),
cex = 0.8
)
color_gradient <- colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(100)
legend_image <- as.raster(matrix(rev(color_gradient), ncol=1))
rasterImage(legend_image, 0.9, 0.6, 1.0, 0.9)
text(x = 0.9, y = 1.0, labels = "Correlation", cex = 0.9)
}
# pdf('./gene_differential_analysis/microbe_gene_interplay_plot.pdf',width = 6,height = 6)
fun_interplay_plot(otu_data,otu_feature,gene_data,gene_feature_hsa04110)

fun_interplay_plot(otu_data,otu_feature,gene_data,gene_feature_hsa04610)

fun_interplay_plot(otu_data,otu_feature,gene_data,gene_feature_hsa05150)

fun_interplay_plot(otu_data,otu_feature,gene_data,gene_feature_hsa00350)

fun_interplay_plot(otu_data,otu_feature,gene_data,gene_feature_hsa00830)

fun_interplay_plot(otu_data,otu_feature,gene_data,gene_feature_hsa00140)

# dev.off()